

# Setup ------------------------------------------------------------------------
  # Options
  options(stringsAsFactors = FALSE)
  # Packages
  library(data.table)
  library(magrittr)
  library(lubridate)
  library(stringr)
  # Directories
  dir_project <- "/Users/edwardarubin/Dropbox/Research/MyProjects/NaturalGas/"
  dir_csv     <- paste0(dir_project, "DataCsv/")
  # Load the SoCalGas baseline allowances
  socal_dt <- fread(paste0(dir_csv, "socalBaselines.csv"))


# Expand to month-level --------------------------------------------------------
  # Convert date_effective to Date class
  socal_dt[, date_effective := date_effective %>% mdy() %>% as.Date()]
  # Add numeric months
  socal_dt %<>% merge(
    y = data.table(month_end = month.name, month_end_int = 1:12),
    by = "month_end", all.x = T, all.y = F, sort = F)
  socal_dt %<>% merge(
    y = data.table(month_start = month.name, month_start_int = 1:12),
    by = "month_start", all.x = T, all.y = F, sort = F)
  # Calculate season lengths
  season_lengths <- mapply(FUN = function(x, y, season) {
    # Summer months start and stop in the same year
    if(season == "summer") {
      the_length <- seq.Date(
        from = as.Date(dmy(paste(1, x, 2010))),
        to   = as.Date(dmy(paste(1, y, 2010))),
        by   = "month") %>% length()
    }
    # Winter months end in the year following their beginning
    if(season == "winter") {
      the_length <- seq.Date(
        from = as.Date(dmy(paste(1, x, 2010))),
        to   = as.Date(dmy(paste(1, y, 2011))),
        by   = "month") %>% length()
    }
    return(the_length)
    },
    x = socal_dt$month_start, y = socal_dt$month_end, season = socal_dt$season,
    SIMPLIFY = FALSE) %>% unlist()
  # Add the lengths to the dataset
  socal_dt[, season_length := season_lengths]
  # Duplicate each row by the length of the season
  socal_dt <- socal_dt[rep(seq(1, nrow(socal_dt)), socal_dt$season_length)]
  # Add sequence of numeric months
  socal_dt[,
    month := last(month_start_int):last(month_start_int + season_length - 1),
    by = list(date_effective, season, climate_zone)]
  # Fix months greater than 12 (counted past December)
  socal_dt[month > 12, month := month - 12]
  # Drop unwanted columns
  socal_dt[, c("month_start", "month_end", "season_length") := NULL]
  # Change names
  setnames(socal_dt,
    old = c("month_start_int", "month_end_int"),
    new = c("month_start", "month_end"))
  # Change column order
  setcolorder(socal_dt, c("date_effective", "month", "climate_zone",
    "season", "month_start", "month_end", "allowance"))
  # Save the new dataset
  write.csv(x = socal_dt,
    file = paste0(dir_csv, "socalBaselinesMonthly.csv"),
    row.names = F)


# Build a simple crosswalk -----------------------------------------------------
  # NOTE: This crosswalk is for 2007 to 2016
  # Load socalBaselinesMonthly.csv
  monthly_dt <- fread(paste0(dir_csv, "socalBaselinesMonthly.csv"))
  # Grab subset (any year from 2007 to 2016 will do)
  simple_dt <- monthly_dt[date_effective == "2016-01-01"]
  # Drop unwanted variables: date effective, start/end months
  simple_dt[, c("date_effective", "month_start", "month_end") := NULL]
  # Sort by climate zone and month
  setorder(simple_dt, climate_zone, month)
  # Save
  write.csv(x = simple_dt,
    file = paste0(dir_csv, "socalBaselinesSimple.csv"),
    row.names = F)
